# bash completion for GNU tar                              -*- shell-script -*-
#
# General info
# ============
#
# The "old" style arguments
# -------------------------
#
# We don't "advice" the old tar option format by default for GNU tar, example:
#
#   'tar czfT /tmp/archive.tar patterns.txt'
#
# We rather advice the 'tar -czf /tmp/archive.tar -T patterns.txt' format of
# arguments.  Though, if user starts the 'first' tar argument without leading
# dash, we treat the command line appropriately.
#
#
# long/short options origin
# -------------------------
#
# For GNU tar, everything is parsed from `tar --help` output so not so much
# per-distribution work should be needed.  The _parse_help does not seem to be
# good enough so parsed here directly.
#
#
# FIXME: --starting-file (-K) (should be matched for extraction only)
# FIXME: handle already used (at least short) options
# FIXME: Test-cases for make check.
#        - check for no global variable pollution
# FIXME: why PS4='$BASH_SOURCE:$LINENO: ' shows sometimes negative lines?
# FIXME: timeout on tarball listing
# FIXME: cache 'tar --help' parsing results into global variables
# FIXME: at least 'tar -<tab>' should show some helping text (apart from just
#        pure option advices)
# FIXME: short option completion should be more intuitive
#        - verbose mode option should be advised multiple times
#        - mode option should be advised only once
#        - format option should be advised only once
#        ...

__gtar_parse_help_opt()
{
    local opttype arg opt separator optvar
    opttype=long
    arg="$2"
    opt="$1"
    separator=" "

    case "$opt" in
        --*) ;;

        -\?)
            return
            ;;
        -*)
            opttype=short
            opt=${opt##-}
            separator=
            ;;
        *)
            echo "bash_completion: $FUNCNAME: unknown option $opt" >&2
            return 1
            ;;
    esac

    # Remove arguments.
    opt=${opt//\[*/}
    opt=${opt//=*/=}

    # Basic sanity.
    opt=${opt//\"*/}
    opt=${opt//\'*/}
    opt=${opt//\;*/}

    optvar=$opttype'_arg_'$arg

    eval "$optvar=\"\$$optvar$separator\"\"$opt\""
}

__gtar_parse_help_line()
{
    local i
    local -a tmp
    while read -ra tmp; do
        for i in "${tmp[@]}"; do
            case "$i" in
                # regular options
                --* | -*)
                    __gtar_parse_help_opt "$i" "$2"
                    ;;

                # end once there is single non-option word
                *)
                    break
                    ;;
            esac
        done
    done <<<"$1"
}

__gnu_tar_parse_help()
{
    local str line arg
    while IFS= read line; do
        # Ok, this requires some comment probably.  The GNU help output prints
        # options on lines beginning with spaces.  After that, there is one
        # or more options separated by ', ' separator string.  We are matching
        # like this then:  ^<spaces>(<separator>?<option>)+<whatever>$
        if [[ $line =~ ^[[:blank:]]{1,10}(((,[[:blank:]])?(--?([\]\[a-zA-Z0-9?=-]+))(,[[:space:]])?)+).*$ ]]; then

            line=${BASH_REMATCH[1]}
            str="${line//,/ }"

            # Detect that all options on this line accept arguments (and whether
            # the arguments are required or not).  Note that only long option
            # description in GNU help output mentions arguments.  So the $line
            # variable may contain e.g. '-X, --XXX[=NAME], -XXX2[=NAME]'.
            arg=none
            if [[ $line =~ --[A-Za-z0-9-]+(\[?)= ]]; then
                [[ -n ${BASH_REMATCH[1]} ]] && arg=opt || arg=req
            fi

            __gtar_parse_help_line "$str" "$arg"
        fi
    done <<<"$(tar --help)"

    long_opts="\
        $long_arg_none        $long_arg_opt        $long_arg_req"

    short_opts="$short_arg_none$short_arg_opt$short_arg_req"
}

# Hack: parse --warning keywords from tar's error output
__gtar_parse_warnings()
{
    local line
    LC_ALL=C tar --warning= 2>&1 | while IFS= read line; do
        if [[ $line =~ ^[[:blank:]]*-[[:blank:]]*[\`\']([a-zA-Z0-9-]+)\'$ ]]; then
            echo "${BASH_REMATCH[1]} no-${BASH_REMATCH[1]}"
        fi
    done
}

# Helper to obtain last character of string.
__tar_last_char()
{
    echo "${1:$((${#1} - 1))}"
}

__tar_parse_old_opt()
{
    local first_word char

    # current word is the first word
    [[ $cword -eq 1 && -n $cur && ${cur:0:1} != '-' ]] &&
        old_opt_progress=1

    # check that first argument does not begin with "-"
    first_word=${words[1]}
    [[ -n $first_word && ${first_word:0:1} != "-" ]] &&
        old_opt_used=1

    # parse the old option (if present) contents to allow later code expect
    # corresponding arguments
    if ((old_opt_used == 1)); then
        char=${first_word:0:1}
        while [[ -n $char ]]; do
            if __tar_is_argreq "$char"; then
                old_opt_parsed+=("$char")
            fi
            first_word=${first_word##$char}
            char=${first_word:0:1}
        done
    fi
}

# Make the analysis of whole command line.
__tar_preparse_cmdline()
{
    local first_arg i modes="ctxurdA"

    shift # progname

    __tar_parse_old_opt

    first_arg=1
    for i in "$@"; do
        case "$i" in
            --delete | --test-label)
                tar_mode=${i:2:100}
                tar_mode_arg=$i
                break
                ;;
            --*)
                # skip
                ;;
            -*[$modes]*)
                tar_mode=${i//[^$modes]/}
                tar_mode=${tar_mode:0:1}
                tar_mode_arg=$i
                break
                ;;
            *[$modes]*)
                # Only the first arg may be "MODE" without leading dash
                if ((first_arg == 1)); then
                    tar_mode=${i//[^$modes]/}
                    tar_mode=${tar_mode:0:1}
                    tar_mode_arg=$i
                fi
                ;;
        esac
        first_arg=0
    done
}

# Generate completions for -f/--file.
__tar_file_option()
{
    local ext="$1"

    case "$tar_mode" in
        c)
            # no need to advise user to re-write existing tarball
            _filedir -d
            ;;
        *)
            _filedir "$ext"
            ;;
    esac
}

# Returns truth if option requires argument.  No equal sign must be pasted.
# Accepts option in format: 'c', '-c', '--create'
__tar_is_argreq()
{
    local opt
    opt=$1
    case "$opt" in
        -[A-Za-z0-9?])
            [[ $short_arg_req =~ ${opt##-} ]] && return 0
            ;;
        [A-Za-z0-9?])
            [[ $short_arg_req =~ ${opt} ]] && return 0
            ;;
        --*)
            [[ $long_arg_req =~ [[:blank:]]$opt=[[:blank:]] ]] && return 0
            ;;
    esac

    return 1
}

# Called only for short parameter
__tar_complete_mode()
{
    local short_modes rawopt generated \
        allshort_raw_unused allshort_raw \
        filler i

    short_modes="ctx"
    [[ ! -v basic_tar ]] && short_modes="ctxurdA"

    # Remove prefix when needed
    rawopt=${cur#-}

    # -c -z -x ... => czx
    allshort_raw=${short_opts//[- ]/}

    # init the 'mode' option if no option is in ${cur}
    if [[ $tar_mode == none ]]; then

        # when user passed something like 'tar cf' do not put the '-' before
        filler=
        if [[ -z $cur && ! -v basic_tar ]]; then
            filler=-
        fi

        generated=""
        for ((i = 0; 1; i++)); do
            local c="${short_modes:i:1}"
            [[ -z $c ]] && break
            generated+=" $filler$cur$c"
        done

        COMPREPLY=($(compgen -W "$generated"))
        return 0
    fi

    # The last short option requires argument, like '-cf<TAB>'.  Cut the
    # completion here to enforce argument processing.
    if ((old_opt_progress == 0)) &&
        __tar_is_argreq "$(__tar_last_char "$cur")"; then
        COMPREPLY=("$cur") && return 0
    fi

    allshort_raw_unused=${allshort_raw//[$rawopt]/}
    if [[ $tar_mode != none ]]; then
        allshort_raw_unused=${allshort_raw_unused//[$short_modes]/}
    fi

    generated=
    for ((i = 0; 1; i++)); do
        local c="${allshort_raw_unused:i:1}"
        [[ -z $c ]] && break
        generated+=" $cur$c"
    done

    COMPREPLY=($(compgen -W "$generated"))

    return 0
}

__gtar_complete_lopts()
{
    local rv
    COMPREPLY=($(compgen -W "$long_opts" -- "$cur"))
    rv=$?
    [[ ${COMPREPLY-} == *= ]] && compopt -o nospace
    return $rv
}

__gtar_complete_sopts()
{
    local generated short_mode_opts i c
    short_mode_opts="ctxurdA"
    generated=${short_opts//[$short_mode_opts]/}

    for ((i = 0; 1; i++)); do
        c="${allshort_raw_unused:i:1}"
        [[ -z $c ]] && break
        generated+=" $cur$c"
    done

    COMPREPLY=($(compgen -W "$generated" -- "$cur"))
}

__tar_try_mode()
{
    case "$cur" in
        --*)
            # posix tar does not support long opts
            [[ -v basic_tar ]] && return 0
            __gtar_complete_lopts
            return $?
            ;;

        -*)
            # posix tar does not support short options
            [[ -v basic_tar ]] && return 0

            __tar_complete_mode && return 0
            ;;

        *)
            if [[ $cword -eq 1 || $tar_mode == none ]]; then
                __tar_complete_mode && return 0
            fi
            ;;
    esac
    return 1
}

__tar_adjust_PREV_from_old_option()
{
    # deal with old style arguments here
    # $ tar cfTC # expects this sequence of arguments:
    # $ tar cfTC ARCHIVE_FILE PATTERNS_FILE CHANGE_DIR
    if ((old_opt_used == 1 && cword > 1 && \
        cword < ${#old_opt_parsed[@]} + 2)); then
        # make e.g. 'C' option from 'cffCT'
        prev="-${old_opt_parsed[cword - 2]}"
    fi
}

__tar_extract_like_mode()
{
    local i
    for i in x d t delete; do
        [[ $tar_mode == "$i" ]] && return 0
    done
    return 1
}

__tar_try_list_archive()
{
    local tarball tarbin untar i

    __tar_extract_like_mode || return 1

    # This all is just to approach directory completion from "virtual"
    # directory structure in tarball (for which the _filedir is unusable)

    set -- "${words[@]}"
    tarbin=$1
    untar="tf"
    shift

    for i in "$@"; do
        if [[ $i == *.$ext ]]; then
            tarball=$i
            break
        fi
    done
    if [[ -v tarball ]]; then
        local IFS=$'\n'
        COMPREPLY=($(compgen -o filenames -W "$(
            $tarbin $untar "$tarball" 2>/dev/null |
                while read line; do
                    printf "%q\n" "$(printf %q"\n" "$line")"
                done
        )" -- "$(printf "%q\n" "$cur")"))
        return 0
    fi
}

__tar_cleanup_prev()
{
    if [[ $prev =~ ^-[a-zA-Z0-9?]*$ ]]; then
        # transform '-caf' ~> '-f'
        prev="-$(__tar_last_char "$prev")"
    fi
}

__tar_detect_ext()
{
    local tars='@(@(tar|gem|spkg)?(.@(Z|[bgx]z|bz2|lz?(ma|o)|zst))|t@([abglx]z|b?(z)2|zst)|cbt)'
    if [[ ${COMP_WORDS[0]} == ?(*/)bsdtar ]]; then
        # https://github.com/libarchive/libarchive/wiki/LibarchiveFormats
        tars=${tars/%\)/|pax|cpio|iso|zip|@(j|x)ar|mtree|a|7z|warc}
        if __tar_extract_like_mode; then
            # read only
            tars+="|l@(ha|zh)|rar|cab)"
        else
            # write only
            tars+="|shar)"
        fi
    fi
    ext="$tars"

    case "$tar_mode_arg" in
        --*)
            # Should never happen?
            ;;
        ?(-)*[cr]*f)
            ext='@(tar|gem|spkg|cbt)'
            case ${words[1]} in
                *a*) ext="$tars" ;;
                *z*) ext='t?(ar.)gz' ;;
                *Z*) ext='ta@(r.Z|z)' ;;
                *[jy]*) ext='t@(?(ar.)bz?(2)|b2)' ;;
                *J*) ext='t?(ar.)xz' ;;
            esac
            ;;
        +([^ZzJjy])f)
            # Pass through using defaults above
            ;;
        *[Zz]*f)
            ext='@(@(t?(ar.)|gem.|spkg.)@(gz|Z)|taz)'
            ;;
        *[jy]*f)
            ext='@(@(t?(ar.)|gem.)bz?(2)|spkg|tb2)'
            ;;
        *[J]*f)
            ext='@(@(tar|gem|spkg).@(lzma|xz)|t[lx]z)'
            ;;
    esac
}

_gtar()
{
    local long_opts short_opts \
        long_arg_none="" long_arg_opt="" long_arg_req="" \
        short_arg_none="" short_arg_opt="" short_arg_req="" \
        tar_mode tar_mode_arg old_opt_progress=0 \
        old_opt_used=0 old_opt_parsed=()

    # Main mode, e.g. -x or -c (extract/creation)
    local tar_mode=none

    # The mode argument, e.g. -cpf or -c
    # FIXME: handle long options
    local tar_mode_arg=

    if [[ -v BASHCOMP_TAR_OPT_DEBUG ]]; then
        set -x
        local PS4='$BASH_SOURCE:$LINENO: '
    fi

    local cur prev words cword split

    _init_completion -s || return

    # Fill the {long,short}_{opts,arg*}
    __gnu_tar_parse_help

    __tar_preparse_cmdline "${words[@]}"

    local ext

    __tar_detect_ext

    while true; do # just-for-easy-break while, not looping
        __tar_adjust_PREV_from_old_option
        __tar_posix_prev_handle && break
        __tar_cleanup_prev

        # Handle all options *REQUIRING* argument.  Optional arguments are up to
        # user (TODO: is there any sane way to deal with this?).  This case
        # statement successes only if there already is PREV.
        case $prev in
            --add-file | --exclude | --exclude-ignore | \
                --exclude-ignore-recursive | --exclude-tag | \
                --exclude-tag-all | --exclude-tag-under | --files-from | \
                --exclude-from | --listed-incremental | --group-map | \
                --mtime | --owner-map | --volno-file | --newer | \
                --after-date | --index-file | -!(-*)[TXg])
                _filedir
                break
                ;;
            --directory | -!(-*)C)
                _filedir -d
                break
                ;;
            --hole-detection)
                COMPREPLY=($(compgen -W 'raw seek' -- "$cur"))
                break
                ;;
            --to-command | --info-script | --new-volume-script | \
                --rmt-command | --rsh-command | --use-compress-program | \
                -!(-*)[FI])
                compopt -o filenames
                COMPREPLY=($(compgen -c -- "$cur"))
                break
                ;;
            --atime-preserve)
                COMPREPLY=($(compgen -W 'replace system' -- "$cur"))
                break
                ;;
            --group)
                COMPREPLY=($(compgen -g -- "$cur"))
                break
                ;;
            --owner)
                COMPREPLY=($(compgen -u -- "$cur"))
                break
                ;;
            --sort)
                COMPREPLY=($(compgen -W 'none name inode' -- "$cur"))
                break
                ;;
            --file | -!(-*)f)
                __tar_file_option "$ext"
                break
                ;;
            --format | -!(-*)H)
                COMPREPLY=($(compgen -W 'gnu oldgnu pax posix ustar v7' \
                    -- "$cur"))
                break
                ;;
            --quoting-style)
                COMPREPLY=($(compgen -W 'literal shell shell-always
                    shell-escape shell-escape-always c c-maybe escape locale
                    clocale' -- "$cur"))
                break
                ;;
            --totals)
                COMPREPLY=($(
                    compgen -W 'SIGHUP SIGQUIT SIGINT SIGUSR1 SIGUSR2' -- "$cur"
                ))
                break
                ;;
            --warning)
                COMPREPLY=($(compgen -W "$(__gtar_parse_warnings)" -- "$cur"))
                break
                ;;
            --*)
                # parameter with required argument but no completion yet
                [[ " $long_arg_req " =~ \ $prev=\  ]] && break

                # parameter with optional argument passed with =, something like
                # --occurrence=*<TAB> which is not handled above
                [[ " $long_arg_opt " =~ \ $prev\  ]] && break

                # if there is some unknown option with '=', for example
                # (literally) user does --nonexistent=<TAB>, we do not want
                # continue also
                $split && break

                # Most probably, when code goes here, the PREV variable contains
                # some string from "$long_arg_none" and we want continue.
                ;;
            -!(-*)[a-zA-Z0-9?])
                # argument required but no completion yet
                [[ $short_arg_req =~ ${prev##-} ]] && break
                ;;
        esac

        # safety belts
        case "$cur" in
            -[a-zA-Z0-9]=*)
                # e.g. 'tar -c -f=sth' does not what user could expect
                break
                ;;
        esac

        # Handle the main operational mode of tar.  We should do it as soon as
        # possible.
        __tar_try_mode && break

        # handle others
        case "$cur" in
            --*)
                __gtar_complete_lopts
                break
                ;;
            -*)
                # called only if it is *not* first parameter
                __gtar_complete_sopts
                break
                ;;
        esac

        # the first argument must be "mode" argument or --param, if any of those
        # was truth - the 'break' statement would have been already called
        ((cword == 1)) && break

        __tar_try_list_archive && break

        # file completion on relevant files
        if [[ $tar_mode != none ]]; then
            _filedir
        fi

        break
    done # just-for-easy-break while

    if [[ -v BASHCOMP_TAR_OPT_DEBUG ]]; then
        set +x
    fi
}

__tar_posix_prev_handle()
{
    case "$prev" in
        -f)
            __tar_file_option "$ext"
            return 0
            ;;
        -b)
            return 0
            ;;
    esac

    return 1
}

_posix_tar()
{
    local long_opts short_opts basic_tar \
        long_arg_none="" long_arg_opt long_arg_req="" \
        short_arg_none short_arg_opt short_arg_req \
        tar_mode tar_mode_arg old_opt_progress=0 \
        old_opt_used=1 old_opt_parsed=()

    # Main mode, e.g. -x or -c (extract/creation)
    local tar_mode=none

    # The mode argument, e.g. -cpf or -c
    local tar_mode_arg=

    local cur prev words cword split

    _init_completion -s || return

    basic_tar=yes
    tar_mode=none

    # relatively compatible modes are {c,t,x}
    # relatively compatible options {b,f,m,v,w}
    short_arg_req="fb"
    short_arg_none="wmv"
    short_opts="$short_arg_req$short_arg_none"

    __tar_preparse_cmdline "${words[@]}"

    local ext

    __tar_detect_ext

    __tar_adjust_PREV_from_old_option

    __tar_posix_prev_handle && return

    __tar_try_mode && return

    __tar_try_list_archive && return

    # file completion on relevant files
    _filedir
}

_tar()
{
    local cmd=${COMP_WORDS[0]} func line
    line="$($cmd --version 2>/dev/null)"
    case "$line" in
        *GNU*)
            func=_gtar
            ;;
        *)
            func=_posix_tar
            ;;
    esac
    $func "$@"

    # Install real completion for subsequent completions
    if [[ ${COMP_TAR_INTERNAL_PATHS-} ]]; then
        complete -F $func -o dirnames tar
    else
        complete -F $func tar
    fi
    unset -f _tar
}

if [[ ${COMP_TAR_INTERNAL_PATHS-} ]]; then
    complete -F _tar -o dirnames tar
    complete -F _gtar -o dirnames gtar
    complete -F _posix_tar -o dirnames bsdtar
    complete -F _posix_tar -o dirnames star
else
    complete -F _tar tar
    complete -F _gtar gtar
    complete -F _posix_tar bsdtar
    complete -F _posix_tar star
fi

# ex: filetype=sh
